home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_200
/
236_01
/
sgrep.c
< prev
next >
Wrap
Text File
|
1989-06-05
|
21KB
|
829 lines
/*
HEADER: CUG236;
TITLE: SGREP Text Pattern Replacer (McKe86);
DATE: 05/17/1987;
DESCRIPTION: "Is a modified version of the CUG 152 grep program,
with the added features of string substitution,
multiple pattern search, case sensitivity, and scanning
option.";
VERSION: 1.0;
KEYWORDS: Text filter;
FILENAME: SGREP.C;
SEE-ALSO: SGREP.DOC;
COMPILERS: vanilla;
AUTHORS: J. McKeon;
*/
/*
*
* The information in this document is subject to change
* without notice and should not be construed as a commitment
* by Digital Equipment Corporation or by DECUS.
*
* Neither Digital Equipment Corporation, DECUS, nor the authors
* assume any responsibility for the use or reliability of this
* document or the described software.
*
* Copyright (C) 1980, DECUS
*
* General permission to copy or modify, but not for profit, is
* hereby granted, provided that the above copyright notice is
* included and reference made to the fact that reproduction
* privileges were granted by DECUS.
*
*/
#include "stdio.h"
/*
* grep.
*
* Runs on the Decus compiler or on vms.
* Converted for BDS compiler (under CP/M-80), 20-Jan-83, by Chris Kern.
* Converted to IBM PC with CI-C86 C Compiler June 1983 by David N. Smith
* On vms, define as:
* grep :== "$disk:[account]grep" (native)
* grep :== "$disk:[account]grep grep" (Decus)
*
* sgrep.
*
* Addition of string substitution capability,
* multiple pattern search, upper-lower case option, scanning options,
* and name changed to sgrep, April 1986 by James J. McKeon.
* Runs on IBM PC or compatibles using ECO C88 compiler.
*
* For help type "sgrep ?". See below for more information.
*
* May 1987, William C. Colley, III -- Fixed bug in usage of function fgets()
* in function compat(). Added several notes about portability to various
* compilers. Fixed a glitch in case STAR of function pmatch() that crashed
* the program when compiled under MSDOS large model. The glitch occurred
* when a pointer was (deliberately) run backward past the beginning of an
* array of char. The array was exactly on a paragraph boundary, so the
* pointer decremented from 0x0e5d0000 to 0x0e5dffff instead of to 0x0e5cffff
* as expected. A pointer comparison then turned out bogus data and the
* program went into an infinite loop. The moral of this story is: Don't
* play fast and loose with pointers if you want portability.
*/
/*
* Portability Note: The AZTEC C compilers handle the binary/text file
* dichotomy differently from most other compilers. Uncomment the following
* pair of #defines if you are running AZTEC C:
*/
/*
#define getc(f) agetc(f)
#define putc(c,f) aputc(c,f)
*/
/*
* Portability Note: 8-bit systems often don't have header file ctype.h.
* If your system doesn't have it, uncomment the #define NO_CTYPE_H.
*/
/*
#define NO_CTYPE_H
*/
#ifdef NO_CTYPE_H
int isalpha(), isalnum(), isdigit(), tolower();
#else
#include <ctype.h>
#endif
char *documentation[] = {
"Sgrep searches a file for a given pattern and substitutes a pattern.",
"If no substitution is required, the command sgrep -myn corresponds",
"to grep -n and will match only. Output is to the screen and may be",
"re-directed using \">\" at the DOS level. Execute by",
" sgrep [flags] pattern-file input-file",
"",
"Flags are single characters preceeded by '-':",
" -c Only a count of matching lines is printed",
" -m Match patterns only, no substitutions.",
" -n Each line is preceeded by its line number",
" -v Only print non-matching lines",
" -y Upper and lower case match.",
"",
0 };
char *patdoc[] = {
"Each match pattern is on a separate line followed by its substitute",
"pattern, if any, also on a separate line.",
" MATCH PATTERNS",
"The regular_expression defines the pattern to search for. Upper and",
"lower-case are regarded as different unless -y option is used.",
"x An ordinary character (not mentioned below) matches that character.",
"'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
"'$' Matches beginning or end of line.",
"'.' A period matches any character except \"new-line\".",
"':a' A colon matches a class of characters described by the following",
"':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
"':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
"': ' other control characters except new-line.",
"'*' An expression followed by an asterisk matches zero or more",
" occurrances of that expression: \"fo*\" matches \"f\", \"fo\"",
" \"foo\", etc.",
"'+' An expression followed by a plus sign matches one or more",
" occurrances of that expression: \"fo+\" matches \"fo\", etc.",
"'-' An expression followed by a minus sign optionally matches",
" the expression.",
"'[]' A string enclosed in square brackets matches any character in",
" that string, but no others. If the first character in the",
" string is a circumflex, the expression matches any character",
" except \"new-line\" and the characters in the string. For",
" example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
" matches \"abc\" but not \"axb\". A range of characters may be",
" specified by two characters separated by \"-\". Note that,",
" [a-z] matches alphabetics, while [z-a] never matches.",
"The concatenation of regular expressions is a regular expression.",
" Scanning options:",
"The default scanning option is match all occurences of the pattern.",
"'@' at the beginning of the pattern, is equivalent to",
" \"$: *\", meaning match first non-whitespace pattern.",
"'@e' at the end of a pattern means that if a match is",
" found (and a substitution made), end all pattern search",
" on current line.",
"'@r' at the end of a pattern means that after a match",
" (and substitution), rescan line until no match occurs.",
" SUBSTITUTE PATTERNS ",
"The only control character for substitute patterns is \"?\".",
"Any other character represents itself. Only the characters ? and",
" \\ itself need to be quoted.",
"'?n' where n is a non-zero digit, indicates the position where",
" the nth wildcard string is to be placed.",
"A wildcard string is any string which is not completely fixed,",
"both as to the characters and number of characters in the string.",
0};
#define LMAX 100
#define PMAX 2000
#define NPATMAX 100
#define CHR 1
#define SCOP 2
#define BEOL 3
#define ANY 4
#define CLASS 5
#define NCLASS 6
#define STAR 7
#define PLUS 8
#define MINUS 9
#define ALPHA 10
#define DIGIT 11
#define NALPHA 12
#define PUNCT 13
#define RANGE 14
#define ENDPAT 15
#define lowopt(x) yflag?tolower(x):x
int cflag;
int mflag;
int nflag;
int vflag;
int yflag;
int debug = 0; /* Set for debug code */
char *pp, *psp;
#ifndef vms
char file_name[81];
#endif
char lbuf[LMAX], slbuf[LMAX];
char pbuf[PMAX], spbuf[PMAX];
char * wldstr[19][2]; /* Location of wildcard strings */
int nws, npat, rescan, nxtln;
char *mpp[NPATMAX], *mpsp[NPATMAX]; /* Location of patterns */
void exit();
/*******************************************************/
int main(argc, argv)
int argc;
char *argv[];
{
register char *p;
register int c;
FILE *f;
void cant(), compat(), grep(), help(), usage();
if (argc <= 1)
usage("No arguments");
if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
help(documentation);
help(patdoc);
return !0;
}
p=argv[argc-1];
if ((f=fopen(p, "r")) == NULL) cant(p);
p = argv[1];
if (*p == '-') {
++p;
while (c = *p++) {
switch(tolower(c)) {
case 'c':
++cflag;
break;
case 'd':
++debug;
break;
case 'm':
++mflag;
break;
case 'n':
++nflag;
break;
case 'v':